rm(list = ls())

## Input: BES online panel
## Output: Respondent by wave observations data.frame

## Load libraries

library(foreign) # v.0.8-82
library(readstata13) # v.0.10.0
library(tidyverse) # v.1.3.2

## Load data 

bes <- read.dta13("data/UKDA-8810-stata/stata/stata13_se/bes2019_w20_ukdspanel_v0-2.dta")

bes <- as_tibble(bes) %>% distinct(id, .keep_all = TRUE) # Drop duplicate ids

# Convert BES data to respondent-wave observations 

base_long <- bes %>%
  select(c("id","gender", starts_with("wave"))) %>%
  pivot_longer(cols = starts_with("wave"),
               names_to = c("variable","wave"),
               names_sep = "wave") %>%
  transmute(id = gsub("bes_ukds_id_","",id),
         wave = wave,
         gender = gender,
         in_wave = value) %>%
  filter(in_wave == 1)


# redistSelf long tibble

redistSelf_long <- bes %>%
  select(c("id", starts_with("redistSelf"))) %>%
  pivot_longer(cols = c(starts_with("redistSelfW")),
               names_to = c("variable","wave"),
               names_sep = "W") %>%
  mutate(value = ifelse(value == "Don't know", NA, value),
         value = as.numeric(value),
         # Reverse code redistSelf so that higher values mean more left wing
         value = max(value, na.rm = TRUE) - value) %>%
  transmute(id = gsub("bes_ukds_id_","",id),
            wave = wave,
            redistSelf = value)

# taxSpendSelf long tibble

taxSpendSelf_long <- bes %>%
  select(c("id", starts_with("taxSpendSelf"))) %>%
  pivot_longer(cols = c(starts_with("taxSpendSelfW")),
               names_to = c("variable","wave"),
               names_sep = "W") %>%
  mutate(value = ifelse(value == "Don't know", NA, value),
         value = as.numeric(value)) %>%
  transmute(id = gsub("bes_ukds_id_","",id),
            wave = wave,
            taxSpendSelf = value)

# soc2010 long tibble

soc2010_long <- bes %>%
  select(c("id", starts_with("soc2010"))) %>%
  mutate(soc2010_base = soc2010W19,
         soc2010_base = ifelse(is.na(soc2010_base), soc2010W18, soc2010_base),
         soc2010_base = ifelse(is.na(soc2010_base), soc2010W17, soc2010_base),
         soc2010_base = ifelse(is.na(soc2010_base), soc2010W16W17W18, soc2010_base),
         soc2010_base = ifelse(is.na(soc2010_base), soc2010W6W7W8W9, soc2010_base),
         soc2010_base = ifelse(is.na(soc2010_base), soc2010W1W2W3W4W5, soc2010_base),
         soc2010 = soc2010_base,
         id = gsub("bes_ukds_id_","",id)) %>%
  select(id, soc2010)


# workingStatus

workingStatus_long <- bes %>%
  select(c("id", starts_with("workingStatus"))) %>%
  transmute(id = id, 
            workingStatusW1 = workingStatusW1W2W3W4W5,
            workingStatusW2 = workingStatusW1W2W3W4W5,
            workingStatusW3 = workingStatusW1W2W3W4W5,
            workingStatusW4 = workingStatusW1W2W3W4W5,
            workingStatusW5 = workingStatusW1W2W3W4W5,
            workingStatusW6 = workingStatusW6_W12,
            workingStatusW7 = workingStatusW6_W12,
            workingStatusW8 = workingStatusW6_W12,
            workingStatusW9 = workingStatusW6_W12,
            workingStatusW10 = workingStatusW6_W12,
            workingStatusW11 = workingStatusW6_W12,
            workingStatusW12 = workingStatusW6_W12,
            workingStatusW14 = workingStatusW14W15,
            workingStatusW15 = workingStatusW14W15,
            workingStatusW16 = workingStatusW16W17W18,
            workingStatusW17 = workingStatusW16W17W18,
            workingStatusW18 = workingStatusW16W17W18,
            workingStatusW19 = workingStatusW19,
            workingStatusW20 = workingStatusW20) %>%
  pivot_longer(cols = c(starts_with("workingStatus")),
               names_to = c("workingStatus","wave"),
               names_pattern = "(.)W(.+)") %>%
  transmute(id = gsub("bes_ukds_id_","",id),
            wave = wave,
            unemployed = as.numeric(value == "Unemployed and looking for work")
            )

# workHomeCoronaSelf long tibble

workHome_long <- bes %>%
  select(c("id", "wave20", "workingStatusW20", starts_with("workHomeCoronaSelfW20"))) %>%
  mutate(workHomeCoronaSelfW20 = case_when(workingStatusW20 %in% c("Unemployed and looking for work",
                                                                   "Retired",
                                                                   "Not in paid work for any other reason") ~ "I already regularly worked at home",
                                           workingStatusW20 %in% c("Full time university student",
                                                                   "Other full time student",
                                                                   "Furloughed") ~ "Yes",
                                           workingStatusW20 %in% c("Working full time (30 or more hours per week)",
                                                                   "Working part time (8-29 hours per week)",
                                                                   "Working part time (less than 8 hours a week)",
                                                                   "Other") ~ as.character(workHomeCoronaSelfW20)),
         workHomeCoronaSelf = factor(workHomeCoronaSelfW20, levels = c("No", "I already regularly worked at home", "Yes")),
         workHomeCoronaSelf_binary = case_when(workHomeCoronaSelf == "Yes" ~ 1,
                                               workHomeCoronaSelf == "I already regularly worked at home" ~ 1,
                                               workHomeCoronaSelf == "No" ~ 0),
         id = gsub("bes_ukds_id_","",id)) %>%
  select(id, workHomeCoronaSelf, workHomeCoronaSelf_binary)

# riskUnemployment long tibble

riskUnemployment_long <- bes %>%
  select(c("id", starts_with("riskUnemployment"))) %>%
  pivot_longer(cols = c(starts_with("riskUnemploymentW")),
               names_to = c("variable","wave"),
               names_sep = "W") %>%
  mutate(value = factor(value, levels = c("Very unlikely", "Fairly unlikely", "Neither likely nor unlikely", "Fairly likely", "Very likely")),
         value = as.numeric(value)) %>%
  transmute(id = gsub("bes_ukds_id_","",id),
            wave = wave,
            riskUnemployment = value)

# riskPoverty long tibble

riskPoverty_long <- bes %>%
  select(c("id", starts_with("riskPoverty"))) %>%
  pivot_longer(cols = c(starts_with("riskPovertyW")),
               names_to = c("variable","wave"),
               names_sep = "W") %>%
  mutate(value = factor(value, levels = c("Very unlikely", "Fairly unlikely", "Neither likely nor unlikely", "Fairly likely", "Very likely")),
         value = as.numeric(value)) %>%
  transmute(id = gsub("bes_ukds_id_","",id),
            wave = wave,
            riskPoverty = value)

# Proximity to others

exposure <- read_csv("data/occupations_exposure_to_disease_data.csv") %>%
  transmute(soc2010 = `UK SOC 2010 Code`,
            proximity = `Proximity to others`,
            proximity_binned = factor(cut(proximity, 3), labels = c("low","mid","high")))


# age in 2020

age_long <- bes %>%
  mutate(YearOfBirth = 2020 - ageW20,
         YearOfBirth = ifelse(is.na(YearOfBirth), 2019 - ageW19, YearOfBirth),
         YearOfBirth = ifelse(is.na(YearOfBirth), 2019 - ageW18, YearOfBirth),
         YearOfBirth = ifelse(is.na(YearOfBirth), 2019 - ageW17, YearOfBirth),
         YearOfBirth = ifelse(is.na(YearOfBirth), 2019 - ageW16, YearOfBirth),
         YearOfBirth = ifelse(is.na(YearOfBirth), 2019 - ageW15, YearOfBirth),
         YearOfBirth = ifelse(is.na(YearOfBirth), 2018 - ageW14, YearOfBirth),
         YearOfBirth = ifelse(is.na(YearOfBirth), 2017 - ageW13, YearOfBirth),
         YearOfBirth = ifelse(is.na(YearOfBirth), 2017 - ageW12, YearOfBirth),
         YearOfBirth = ifelse(is.na(YearOfBirth), 2017 - ageW11, YearOfBirth),
         YearOfBirth = ifelse(is.na(YearOfBirth), 2016 - ageW10, YearOfBirth),
         YearOfBirth = ifelse(is.na(YearOfBirth), 2016 - ageW9, YearOfBirth),
         YearOfBirth = ifelse(is.na(YearOfBirth), 2016 - ageW8, YearOfBirth),
         YearOfBirth = ifelse(is.na(YearOfBirth), 2016 - ageW7, YearOfBirth),
         YearOfBirth = ifelse(is.na(YearOfBirth), 2015 - ageW6, YearOfBirth),
         YearOfBirth = ifelse(is.na(YearOfBirth), 2015 - ageW5, YearOfBirth),
         YearOfBirth = ifelse(is.na(YearOfBirth), 2015 - ageW4, YearOfBirth),
         YearOfBirth = ifelse(is.na(YearOfBirth), 2014 - ageW3, YearOfBirth),
         YearOfBirth = ifelse(is.na(YearOfBirth), 2014 - ageW2, YearOfBirth),
         YearOfBirth = ifelse(is.na(YearOfBirth), 2014 - ageW1, YearOfBirth),
         age = 2020 - YearOfBirth,
         id = gsub("bes_ukds_id_","",id)) %>%
  select(id, age) 

# occupational unemployment rates

## 1 digit
occ_rates <- readstata13::read.dta13("data/LFS/our.dta")
soc2010_long$soc_major <- as.numeric(substring(soc2010_long$soc2010,1,1))
occ_rates$gender <- ifelse(occ_rates$gender == "f", "Female", "Male")

## 3 digit
occ_rates_min <- readstata13::read.dta13("data/LFS/ourmin.dta")
soc2010_long$soc_minor <- as.numeric(substring(soc2010_long$soc2010,1,3))
occ_rates_min$gender <- ifelse(occ_rates_min$gender == "our_f", "Female", "Male")
names(occ_rates_min)[2:5] <- paste0(names(occ_rates_min)[2:5],"_3dig")

# subjective COVID experiences


covid_attitudes_battery <- bes %>% filter(wave20 == 1) %>%
  select(id, worryCoronaHealthW20, dutyCoronaW20, coronaDiedW20, CVsuspect_1W20, CVsuspect_2W20, CVsuspect_3W20, cvSeveritySelfW20, cvSeverityFamilyW20, cvSeverityCloseW20, cvFreedomSelfW20, cvEconSelfW20) %>%
  mutate(worryCoronaHealth = as.numeric(factor(worryCoronaHealthW20, levels = c("Not at all worried", 1:9, "Extremely Worried"))),
         dutyCorona = as.numeric(factor(dutyCoronaW20, levels = c("Strongly disagree", "Disagree", "Neither agree nor disagree", "Agree", "Strongly agree"))),
         coronaDied = case_when(coronaDiedW20 == "Yes" ~ 1,
                                coronaDiedW20 == "No" ~ 0),
         hadCovidSelf = case_when(CVsuspect_1W20 == "Yes" ~ 1,
                                CVsuspect_1W20 == "No" ~ 0),
         hadCovidFamily = case_when(CVsuspect_2W20 == "Yes" ~ 1,
                                CVsuspect_2W20 == "No" ~ 0),
         hadCovidFriend = case_when(CVsuspect_3W20 == "Yes" ~ 1,
                                CVsuspect_3W20 == "No" ~ 0),
         cvFreedomSelf = as.numeric(factor(cvFreedomSelfW20, levels = rev(c("Restrict personal freedom to reduce infections", 1:9, "Protect personal freedom even if it increases infections")))),
         cvEconSelf = as.numeric(factor(cvEconSelfW20, levels = rev(c("Reduce infections even if it damages the economy", 1:9, "Save the economy even if it increases infections")))),
         id = gsub("bes_ukds_id_","",id),
         wave = as.character(20)
         ) %>%
  select(id, wave, worryCoronaHealth, dutyCorona, coronaDied, hadCovidSelf, hadCovidFamily, hadCovidFriend, cvFreedomSelf, cvEconSelf)


# subjectiveClass

subjClass_long <- bes %>%
  select(c("id", starts_with("subjClass"))) %>%
  transmute(id = id, 
            subjClassW2 = subjClassW2_W4W7W9,
            subjClassW3 = subjClassW2_W4W7W9,
            subjClassW4 = subjClassW2_W4W7W9,
            subjClassW7 = subjClassW2_W4W7W9,
            subjClassW9 = subjClassW2_W4W7W9,
            subjClassW10 = subjClassW10W11,
            subjClassW11 = subjClassW10W11,
            subjClassW12 = subjClassW12W14,
            subjClassW14 = subjClassW12W14) %>%
  pivot_longer(cols = c(starts_with("subjClass")),
               names_to = c("subjClass","wave"),
               names_pattern = "(.)W(.+)") %>%
  transmute(id = gsub("bes_ukds_id_","",id),
            wave = as.character(wave),
            subjClass = value
  ) %>%
  filter(!is.na(subjClass))

# region

region_long <- bes %>%
  select(c("id", starts_with("gor"))) %>%
  pivot_longer(cols = c(starts_with("gor")),
               names_to = c("gor","wave"),
               names_pattern = "(.)W(.+)") %>%
  transmute(id = gsub("bes_ukds_id_","",id),
            wave = as.character(wave),
            region = value
  ) %>% 
  filter(!is.na(region))


# education

education_long <- bes %>%
  select(c("id", starts_with("p_education"))) %>%
  select(!contains("age")) %>%
  pivot_longer(cols = c(starts_with("p_education")),
               names_to = c("p_education","wave"),
               names_pattern = "(.)W(.+)") %>%
  transmute(id = gsub("bes_ukds_id_","",id),
            wave = as.numeric(wave),
            education = value
  ) %>% 
  filter(!is.na(education)) %>%
  mutate(educ4 = recode(as.character(education),
                        'No formal qualifications'='No qualifications',
                        'Youth training certificate/skillseekers' = 'Level 1', 
                        'Recognised trade apprenticeship completed' = 'Level 1', 
                        'CSE grades 2-5' = 'Level 1', 
                        
                        'Clerical and commercial' = 'Level 2', 
                        'City & Guilds certificate' = 'Level 2', 
                        'CSE grade 1, GCE O level, GCSE, School Certificate' = 'Level 2', 
                        'Scottish Ordinary/ Lower Certificate' = 'Level 2', 
                        
                        'City & Guilds certificate - advanced' = 'Level 3', 
                        'ONC' = 'Level 3', 
                        'GCE A level or Higher Certificate' = 'Level 3', 
                        'Scottish Higher Certificate' = 'Level 3', 
                        
                        'Nursing qualification (e.g. SEN, SRN, SCM, RGN)' = 'Level 4 and above', 
                        'Teaching qualification (not degree)' = 'Level 4 and above', 
                        'University diploma' = 'Level 4 and above', 
                        'University or CNAA first degree (e.g. BA, B.Sc, B.Ed)' = 'Level 4 and above', 
                        'University or CNAA higher degree (e.g. M.Sc, Ph.D)' = 'Level 4 and above', 
                        
                        'Other technical, professional or higher qualification' = 'Other',
                        "Don't know" = "Don't know",
                        "Prefer not to say" = "Prefer not to say"),
         
         #educ4 = ifelse(age >= 70, "Does not apply", educ4),
         educ4 = factor(educ4, levels = c('No qualifications', 'Level 1', 'Level 2', 'Level 3', 'Level 4 and above', 'Other',"Don't know", "Prefer not to say")),
         educ = educ4,
         wave = as.character(wave)) %>% 
   select(id, wave, educ)


# income

income_long <- bes %>%
  select(c("id", starts_with("p_gross_household"))) %>%
  pivot_longer(cols = c(starts_with("p_gross_household")),
               names_to = c("p_gross_household","wave"),
               names_pattern = "(.)W(.+)") %>%
  transmute(id = gsub("bes_ukds_id_","",id),
            wave = as.character(wave),
            income = value
  ) %>% 
  filter(!is.na(income))


# housing

housing_long <- bes %>%
  select(c("id", starts_with("p_housing"))) %>%
  pivot_longer(cols = c(starts_with("p_housing")),
               names_to = c("p_housing","wave"),
               names_pattern = "(.)W(.+)") %>%
  transmute(id = gsub("bes_ukds_id_","",id),
            wave = as.character(wave),
            housing = value
  ) %>% 
  filter(!is.na(housing))


## Merge

redistSelf_long <- redistSelf_long %>% filter(!is.na(redistSelf))
taxSpendSelf_long <- taxSpendSelf_long %>% filter(!is.na(taxSpendSelf))
workingStatus_long <- workingStatus_long %>% filter(!is.na(unemployed))
soc2010_long <- soc2010_long %>% filter(!is.na(soc2010))
workHome_long <- workHome_long %>% filter(!is.na(workHomeCoronaSelf))
age_long <- age_long %>% filter(!is.na(age))
riskUnemployment_long <- riskUnemployment_long %>% filter(!is.na(riskUnemployment))
riskPoverty_long <- riskPoverty_long %>% filter(!is.na(riskPoverty))

bes_reduced <- base_long %>%
  full_join(redistSelf_long, by = c("id", "wave")) %>%
  full_join(taxSpendSelf_long, by = c("id", "wave")) %>%
  full_join(workingStatus_long, by = c("id", "wave")) %>%
  full_join(riskUnemployment_long, by = c("id", "wave")) %>%
  full_join(riskPoverty_long, by = c("id", "wave")) %>%
  full_join(covid_attitudes_battery, by = c("id","wave")) %>% 
  full_join(soc2010_long, by = c("id")) %>%
  full_join(workHome_long, by = c("id")) %>% 
  full_join(age_long, by = c("id")) %>% 
  full_join(exposure, by = "soc2010") %>% 
  full_join(occ_rates, by = c("soc_major", "gender")) %>%
  full_join(occ_rates_min, by = c("soc_minor", "gender")) %>%
  full_join(education_long, by = c("id","wave")) %>% 
  full_join(region_long, by = c("id","wave")) %>%
  full_join(subjClass_long, by = c("id","wave")) %>%
  full_join(income_long, by = c("id","wave")) %>%
  full_join(housing_long, by = c("id","wave"))


bes_reduced <- bes_reduced %>%
  mutate(occ_unemp_1dig = case_when(wave %in% 20 ~ our_aprjun20,
                               wave %in% c(17,18,19) ~ our_octdec19,
                               wave %in% c(15,16) ~ our_janmar19,
                               wave %in% 14 ~ our_aprjun18),
         occ_unemp_3dig = case_when(wave %in% 20 ~ our_aprjun20_3dig,
                               wave %in% c(17,18,19) ~ our_octdec19_3dig,
                               wave %in% c(15,16) ~ our_janmar19_3dig,
                               wave %in% 14 ~ our_aprjun18_3dig))

# Other variables

bes_reduced <- bes_reduced %>%
  mutate(wave = as.numeric(wave),
         pandemic = ifelse(wave > 19, 1, 0),
         OccHumanProx = proximity * pandemic) %>%
  filter(wave >= 14 & in_wave == 1,
         age > 15 & age <= 65)

save(bes_reduced, file = "working/bes_reduced.Rdata")



